This document demonstrates the use of the bRF and LASSO-D3S functions for integrative GRN inference.

Those functions infer the regulatory pathways of Arabidopsis thaliana’s roots in response to nitrate (N) induction from Varala et al., 2018.

They use as inputs the expression profiles of N-responsive genes and TFBS information. Prior TFBS information was built by searching in the promoters of the N-responsive genes the PWM of the N-responsive regulators.

Data import

Expression data

load('rdata/inference_input_N_response_varala.rdata')
genes <- input_data$grouped_genes; length(genes)
## [1] 1426
tfs <- input_data$grouped_regressors; length(tfs)
## [1] 201
counts <- input_data$counts; dim(counts)
## [1] 1426   45

TFBS data

load("rdata/pwm_occurrences_N_response_varala.rdata")
dim(pwm_occurrence)
## [1] 1426  201

Getting mean expression and variances per genes

mean_expr <- rowMeans(counts)
var_expr <- matrixStats::rowSds(counts)*matrixStats::rowSds(counts)

GRN inference

How does the individual MSE of genes varies with \(\alpha\) for model estimation in GRN inferences?

bRf : biased Random Forests

# ALPHAS <- seq(0,1, by = 0.1)
# lmses <- data.frame(row.names = genes)
# 
# for(alpha in ALPHAS){
#   set.seed(121314)
#   lmses[,as.character(alpha)] <- bRF_inference_MSE(counts, genes, tfs, alpha = alpha, nTrees = 4000,
#                              pwm_occurrence = pwm_occurrence, nCores = 18)
# }
# 
# save(lmses, file = "results/logMSES_RFs_per_genes.rdata")

Clustering the genes :

load(file = "results/logMSES_RFs_per_genes_400trees.rdata")

mse_rf <- (lmses-rowMeans(lmses)) / matrixStats::rowSds(as.matrix(lmses))

ha = HeatmapAnnotation(
    alpha = anno_simple(as.numeric(rep(colnames(mse_rf),1))),
    annotation_name_side = "left")

heatmap_rf_4000 <- Heatmap(mse_rf, row_km = 3, cluster_columns = F, show_row_names = F, 
        name = "scaled logMSE (z-score)", top_annotation = ha,
        row_title = "Genes", column_title = "alpha");heatmap_rf_4000

LASSO-D3S

# lmses_lasso <- data.frame(row.names = genes)
# 
# for(alpha in ALPHAS){
#   set.seed(121314)
#   lmses_lasso[,as.character(alpha)] <- LASSO.D3S_inference_MSE(counts, genes, tfs,
#                                                                normalized = TRUE, 
#                                                                alpha = alpha, N = 50,
#                                pwm_occurrence = pwm_occurrence, nCores = 15)
# }
# 
# save(lmses_lasso, file = "results/logMSES_Lasso_per_genes.rdata")
load(file = "results/logMSES_Lasso_per_genes.rdata")
mse_lasso <- (lmses_lasso-rowMeans(lmses_lasso)) / matrixStats::rowSds(as.matrix(lmses_lasso))

heatmap_lasso <- Heatmap(mse_lasso, row_km = 4, cluster_columns = F, show_row_names = F, 
        name = "scaled logMSE (z-score)", top_annotation = ha,
        row_title = "Genes", column_title = "alpha"); heatmap_lasso

Clustering des gènes suivant la plage de alphas sur laquelle ils atteignent leur minimum

average_genes <- function(mse){
  new_mse <- mse
  new_mse[,"0-0.2"] <- rowMeans(new_mse[c("0", "0.1", "0.2")])
  new_mse[,"0.3-0.6"] <- rowMeans(new_mse[c("0.3", "0.4", "0.5", "0.6")])
  new_mse[,"0.7-1"] <- rowMeans(new_mse[c("0.7", "0.8", "0.9", "1")])
  return(new_mse[c("0-0.2", "0.3-0.6", "0.7-1")])
}

smooth_rf <- average_genes(lmses)
smooth_lasso <- average_genes(lmses_lasso)


get_optimum <- function(gene, method = "rf"){
  if(method=="rf")
    return(names(which.min(smooth_rf[gene,])))
  if(method=="lasso")
    return(names(which.min(smooth_lasso[gene,])))
}
gene <- rownames(smooth_rf)[1]

clusters_rf <- sapply(genes, get_optimum, method = "rf")
clusters_lasso <- sapply(genes, get_optimum, method = "lasso")
table(clusters_rf); table(clusters_lasso)
## clusters_rf
##   0-0.2 0.3-0.6   0.7-1 
##     859     295     272
## clusters_lasso
##   0-0.2 0.3-0.6   0.7-1 
##     990     279     157

Représentation de ces groupes sur les heatmap initiales

Heatmap(mse_rf, 
        row_km = 3, cluster_columns = F, show_row_names = F, 
        name = "MSE/Var", top_annotation = ha, 
        row_title = "Genes")+ rowAnnotation(
    clusters_rf = clusters_rf,
    col=list(clusters_rf= setNames(c("darkorange", "yellow", "green"), 
                                       nm = names(table(clusters_rf))) ))

Heatmap(mse_lasso, 
        row_km = 3, cluster_columns = F, show_row_names = F, 
        name = "MSE/Var", top_annotation = ha, 
        row_title = "Genes")+ rowAnnotation(
    clusters_lasso = clusters_lasso,
    col=list(clusters_lasso= setNames(c("darkorange", "yellow", "green"), 
                                       nm = names(table(clusters_lasso))) ))

Quelles sont les caractéristiques des clusters de gènes

Nombre de motifs dans le promoteur, niveau d’expression, ontologies, sont-ils des TFs? leur taille?

Quelle est la précision rappel des sous réseaux concernant ces gènes.

load("rdata/pwm_prom_jaspar_dap.rdata")
library(patchwork)

genes_info <- data.frame(genes = genes, 
                         cluster_lasso = clusters_lasso[genes], 
                         cluster_rf = clusters_rf[genes])
genes_info$is_tf <- genes %in% tfs
genes_info$var <- var_expr
genes_info$expr <- mean_expr
genes_info$nb_motifs <- table(pwm_prom$target)[genes]

genes_info%>%
  ggplot(aes(x=cluster_rf, y=nb_motifs)) + 
  geom_violin(fill="darkblue", alpha=0.2)+geom_jitter(width=0.1)+
  geom_boxplot(width=0.1, fill = "white")+
  ggtitle(("Number of motifs in promoter for RF groups")) + 
  stat_compare_means() + genes_info%>%
  ggplot(aes(x=cluster_lasso, y=nb_motifs)) + 
  geom_violin(fill="darkblue", alpha=0.2)+geom_jitter(width=0.1)+
  geom_boxplot(width=0.1, fill = "white")+
  ggtitle(("Number of motifs in promoter for LASSO groups")) +
  stat_compare_means() 
## Don't know how to automatically pick scale for object of type <table>.
## Defaulting to continuous.
## Don't know how to automatically pick scale for object of type <table>.
## Defaulting to continuous.

genes_info%>%
  ggplot(aes(x=cluster_rf, y=var)) + 
  geom_violin(fill="darkblue", alpha=0.2)+geom_jitter(width=0.1)+
  geom_boxplot(width=0.1, fill = "white")+
  ggtitle(("Gene variance for RF groups")) + scale_y_log10()+
  stat_compare_means()+
genes_info%>%
  ggplot(aes(x=cluster_lasso, y=var)) + 
  geom_violin(fill="darkblue", alpha=0.2)+geom_jitter(width=0.1)+
  geom_boxplot(width=0.1, fill = "white")+
  ggtitle(("Gene variance for LASSO groups")) + scale_y_log10()+
  stat_compare_means()

genes_info%>%
  ggplot(aes(x=cluster_rf, y=expr)) + 
  geom_violin(fill="darkblue", alpha=0.2)+geom_jitter(width=0.1)+
  geom_boxplot(width=0.1, fill = "white")+
  ggtitle(("Gene expression for RF groups")) + scale_y_log10()+
  stat_compare_means()+
genes_info%>%
  ggplot(aes(x=cluster_lasso, y=expr)) + 
  geom_violin(fill="darkblue", alpha=0.2)+geom_jitter(width=0.1)+
  geom_boxplot(width=0.1, fill = "white")+
  ggtitle(("Gene expression for LASSO groups")) + scale_y_log10()+
  stat_compare_means()

genes_info %>%
  group_by(cluster_rf) %>%
  summarise(n=n(), 
            tf_frac=sum(is_tf)/n()) %>%
  ggplot(aes(x=cluster_rf, y=tf_frac, 
                          label = paste("n=",n))) + 
  geom_bar(stat = "identity", aes(fill=tf_frac), alpha = 1)+
  geom_hline(yintercept = length(tfs)/length(genes)) + 
  geom_text(y=0.2) + xlab("cluster RF") +
  ggtitle("Fraction of TFs in RF groups")+ylim(c(0,0.2))+
genes_info %>%
  group_by(cluster_lasso) %>%
  summarise(n=n(), 
            tf_frac=sum(is_tf)/n()) %>%
  ggplot(aes(x=cluster_lasso, y=tf_frac, 
                          label = paste("n=",n))) + 
  geom_bar(stat = "identity",  aes(fill=tf_frac), alpha = 1)+
  geom_hline(yintercept = length(tfs)/length(genes)) + 
  geom_text(y=0.2) + xlab("cluster LASSO") +ylim(c(0,0.2))+
  ggtitle("Fraction of TFs in LASSO groups")

Est-ce qu’on a significativement plus de TFs dans les genes qui sont bien prédits avec les PWM chez le lasso?

fisher.test(matrix(c(length(genes), length(tfs), 157, 157*0.1910828), 
                   nrow = 2), alternative = "greater")
## 
##  Fisher's Exact Test for Count Data
## 
## data:  matrix(c(length(genes), length(tfs), 157, 157 * 0.1910828), nrow = 2)
## p-value = 0.09628
## alternative hypothesis: true odds ratio is greater than 1
## 95 percent confidence interval:
##  0.9255823       Inf
## sample estimates:
## odds ratio 
##   1.355392

Precision and recall of inferred GRNs subsets for different groups

load("results/networks_bRF_lasso.rdata")
edges <- edges[str_detect(names(edges), "0.075|0.05|0.01")]
color_palette = c( "#EC5D2F","#FE945C", "#FFC08E" )
settings <- c("method", "alpha", "density", "rep")
prettyZero <- function(l){
    max.decimals = max(nchar(str_extract(l, "\\.[0-9]+")), na.rm = T)-1
    lnew = formatC(l, replace.zero = T, zero.print = "0",
        digits = max.decimals, format = "f", preserve.width=T)
    return(lnew)}

get_precision <- function(group, method){
  if(method == "lasso")
    genes <- names(which(clusters_lasso == group))
  if(method == "rf")
    genes <- names(which(clusters_rf == group))
  
  subset <- lapply(edges, function(net){net[net$to %in% genes,]})

  val_conn <-
    evaluate_networks(
      subset,
      validation = c("TARGET", "CHIPSeq", "DAPSeq"),
      nCores = 10,
      input_genes = genes,
      input_tfs = tfs
    )
  val_conn[, settings] <-
    str_split_fixed(val_conn$network_name, '_', length(settings))
  val_conn$group <- group
  val_conn$method_mse <- method
  
  if(method == "rf")
    val_conn<-val_conn[val_conn$method != "LASSO-D3S",]
  if(method == "lasso")
    val_conn<-val_conn[val_conn$method == "LASSO-D3S",]
  return(val_conn)
}

precision <- data.frame()
for(group in unique(clusters_lasso)){
  for(method in c("rf", "lasso")){
    if(nrow(precision)==0)
      precision<- get_precision(group, method)
    else
      precision <- rbind.data.frame(precision, get_precision(group, method))
  }
}
## 
## Parallel network validation with AranetBench Using 10 cores.
## 4.225 sec elapsed
## 
## Parallel network validation with AranetBench Using 10 cores.
## 3.665 sec elapsed
## 
## Parallel network validation with AranetBench Using 10 cores.
## 7.044 sec elapsed
## 
## Parallel network validation with AranetBench Using 10 cores.
## 9.502 sec elapsed
## 
## Parallel network validation with AranetBench Using 10 cores.
## 5.585 sec elapsed
## 
## Parallel network validation with AranetBench Using 10 cores.
## 5.274 sec elapsed
(precision %>%
  dplyr::select(-network_name) %>%
  mutate(alpha = as.numeric(alpha)) %>%
  ggplot(aes(color = density, x = alpha, y = precision)) +
  ggh4x::facet_nested_wrap(vars(method, group), ncol = 8, nest_line = T) + geom_point() +
  geom_smooth(aes(fill = density), alpha = 0.1) +
  theme(
    strip.background = element_blank(),
    axis.title.x = element_text(size = 22),
    title = element_text(size = 16),
    strip.text = element_text(size = 16),
    legend.text = element_text(size = 15),
    axis.text = element_text(size = 12),
    legend.position = 'top'
  ) +
  xlab(expression(alpha)) + ylab("Precision") +
  ggtitle("Precision against ConnecTF") +
  scale_x_continuous(labels = prettyZero) +
  scale_color_manual(name = "Network density", values = color_palette) +
  scale_fill_manual(name = "Network density", values = color_palette))/(
  precision %>%
  dplyr::select(-network_name) %>%
  mutate(alpha = as.numeric(alpha)) %>%
  ggplot(aes(color = density, x = alpha, y = recall)) +
  ggh4x::facet_nested_wrap(vars(method, group), ncol = 8, nest_line = T) + geom_point() +
  geom_smooth(aes(fill = density), alpha = 0.1) +
  theme(
    strip.background = element_blank(),
    axis.title.x = element_text(size = 22),
    title = element_text(size = 16),
    strip.text = element_text(size = 16),
    legend.text = element_text(size = 15),
    axis.text = element_text(size = 12),
    legend.position = 'none'
  ) +
  xlab(expression(alpha)) + ylab("Recall") +
  ggtitle("Recall against ConnecTF") +
  scale_x_continuous(labels = prettyZero) +
  scale_color_manual(name = "Network density", values = color_palette) +
  scale_fill_manual(name = "Network density", values = color_palette))
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

library(DIANE)
## 
## Attaching package: 'DIANE'
## The following object is masked from 'package:igraph':
## 
##     normalize
background <- convert_from_agi(genes)
## 
for(group in unique(clusters_lasso)){
  
  genes_i <- names(which(clusters_lasso == group))
  
  print(paste("LASSO", length(genes_i), "genes,", group, "\n"))
  genes_i <- convert_from_agi(genes_i)
  go_lasso <- enrich_go(genes_i, background)
  DIANE::draw_enrich_go(go_lasso)
  print(go_lasso)
  genes_i <- names(which(clusters_rf == group))
  print(paste("RF", length(genes_i), "genes, min mse at", group))
  genes_i <- convert_from_agi(genes_i)
  go_rf <- enrich_go(genes_i, background)
  DIANE::draw_enrich_go(go_rf)
  print(go_rf)
  
  # common_go <- intersect(go_lasso$ID, go_rf$ID)
  # print(go_lasso[common_go,])
}
## [1] "LASSO 157 genes, 0.7-1 \n"
## 
## [1] ID          Description GeneRatio   BgRatio     pvalue      p.adjust   
## [7] qvalue      geneID      Count      
## <0 rows> (or 0-length row.names)
## [1] "RF 272 genes, min mse at 0.7-1"
##                    ID                              Description GeneRatio
## GO:0050896 GO:0050896                     response to stimulus    98/225
## GO:0042221 GO:0042221                     response to chemical    68/225
## GO:0051716 GO:0051716            cellular response to stimulus    55/225
## GO:0010033 GO:0010033            response to organic substance    48/225
## GO:0070887 GO:0070887   cellular response to chemical stimulus    40/225
## GO:0009719 GO:0009719          response to endogenous stimulus    35/225
## GO:0007165 GO:0007165                      signal transduction    35/225
## GO:0009725 GO:0009725                      response to hormone    34/225
## GO:0071310 GO:0071310   cellular response to organic substance    28/225
## GO:0033993 GO:0033993                        response to lipid    24/225
## GO:0071495 GO:0071495 cellular response to endogenous stimulus    21/225
## GO:0032870 GO:0032870    cellular response to hormone stimulus    20/225
## GO:1901698 GO:1901698            response to nitrogen compound    13/225
##             BgRatio       pvalue    p.adjust      qvalue
## GO:0050896 401/1192 3.742682e-04 0.033414812 0.030127501
## GO:0042221 227/1192 4.162561e-06 0.001973054 0.001778947
## GO:0051716 203/1192 1.015654e-03 0.040118342 0.036171545
## GO:0010033 147/1192 1.321293e-05 0.002843887 0.002564108
## GO:0070887 120/1192 4.754524e-05 0.005634111 0.005079834
## GO:0009719 111/1192 5.325665e-04 0.033414812 0.030127501
## GO:0007165 115/1192 1.120333e-03 0.040849055 0.036830371
## GO:0009725 107/1192 5.498441e-04 0.033414812 0.030127501
## GO:0071310  70/1192 1.799928e-05 0.002843887 0.002564108
## GO:0033993  69/1192 9.332992e-04 0.040118342 0.036171545
## GO:0071495  56/1192 6.344585e-04 0.033414812 0.030127501
## GO:0032870  52/1192 5.850858e-04 0.033414812 0.030127501
## GO:1901698  28/1192 7.056834e-04 0.033449395 0.030158681
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        geneID
## GO:0050896 AtNIGT1/AtGDPD2/NA/STY46/ATBCA4/DIN10/ATMPK18/ATRLP57/B73/TGA1/AtbZIP63/PIP2;3/KFB01/AKS3/UGT76B1/ABF3/ARR9/ATWL2/ASG1/ARR4/ATNRT3.1/MRF1/NA/ABS3/ATGA3OX1/COP3/NA/ACH1/STOP2/ATSUC1/ATTLP9/CAR9/ATWRKY18/GBF3/5PTASE2/NA/SDI2/NA/NA/DHAR2/AtATL15/MIZ1/HYH/ACO2/GIS3/ATSEN1/ARR3/ATIRT3/EMB2728/ATPLC4/VQ18/LSU3/ATGLR2.2/FAR4/NA/RHA2A/NA/RHA2B/NA/NA/RFI2/AtRLP24/AT-RSH2/DLO1/PYE/ATMKP2/BIP/APRR9/SAUR37/ATRMA1/ATMPK19/NHL13/SAP2/ATNCED3/NA/NA/BIP1/ATSTP13/ATWRKY57/NA/EGR2/KFB20/AHB1/AtCIPK16/AtNPF5.2/ATMYB59/ATOXS3/ATBT4/AtIDD14/PP2CG1/NA/NA/AtKFB50/P5CS2/ATRMA2/ATCTL1/OXR2/AtDWF4
## GO:0042221                                                                                                                                                                          AtNIGT1/AtGDPD2/NA/STY46/ATBCA4/DIN10/B73/AtbZIP63/PIP2;3/KFB01/UGT76B1/ABF3/ARR9/ASG1/ARR4/ATNRT3.1/NA/ABS3/ATGA3OX1/COP3/NA/ACH1/STOP2/CAR9/ATWRKY18/GBF3/5PTASE2/NA/NA/NA/DHAR2/AtATL15/GIS3/ATSEN1/ARR3/VQ18/LSU3/ATGLR2.2/RHA2A/RHA2B/AT-RSH2/DLO1/ATMKP2/BIP/SAUR37/ATRMA1/SAP2/ATNCED3/NA/BIP1/ATSTP13/ATWRKY57/EGR2/KFB20/AHB1/AtNPF5.2/ATMYB59/ATOXS3/ATBT4/PP2CG1/NA/NA/AtKFB50/P5CS2/ATRMA2/ATCTL1/OXR2/AtDWF4
## GO:0051716                                                                                                                                                                                                                                                                       AtNIGT1/AtGDPD2/NA/STY46/DIN10/ATMPK18/ATRLP57/B73/AtbZIP63/KFB01/UGT76B1/ABF3/ARR9/ATWL2/ARR4/ATGA3OX1/COP3/ACH1/CAR9/NA/SDI2/NA/DHAR2/HYH/GIS3/ARR3/ATPLC4/VQ18/LSU3/ATGLR2.2/NA/RHA2A/RHA2B/RFI2/AtRLP24/PYE/ATMKP2/BIP/APRR9/ATRMA1/ATMPK19/SAP2/NA/BIP1/NA/KFB20/AHB1/AtCIPK16/ATMYB59/NA/NA/AtKFB50/ATRMA2/OXR2/AtDWF4
## GO:0010033                                                                                                                                                                                                                                                                                                 AtNIGT1/STY46/ATBCA4/B73/AtbZIP63/KFB01/UGT76B1/ABF3/ARR9/ASG1/ARR4/NA/ATGA3OX1/COP3/CAR9/ATWRKY18/GBF3/5PTASE2/NA/NA/NA/DHAR2/AtATL15/GIS3/ATSEN1/ARR3/VQ18/ATGLR2.2/RHA2A/RHA2B/AT-RSH2/DLO1/BIP/SAUR37/ATRMA1/NA/BIP1/ATSTP13/KFB20/AtNPF5.2/ATBT4/PP2CG1/NA/AtKFB50/P5CS2/ATRMA2/ATCTL1/AtDWF4
## GO:0070887                                                                                                                                                                                                                                                                                                                                                              AtNIGT1/AtGDPD2/NA/STY46/DIN10/B73/AtbZIP63/KFB01/UGT76B1/ABF3/ARR9/ARR4/ATGA3OX1/COP3/ACH1/CAR9/NA/NA/DHAR2/GIS3/ARR3/VQ18/LSU3/ATGLR2.2/RHA2A/RHA2B/ATMKP2/BIP/ATRMA1/SAP2/BIP1/KFB20/AHB1/ATMYB59/NA/NA/AtKFB50/ATRMA2/OXR2/AtDWF4
## GO:0009719                                                                                                                                                                                                                                                                                                                                                                            AtNIGT1/STY46/B73/AtbZIP63/KFB01/ABF3/ARR9/ASG1/ARR4/ATGA3OX1/COP3/CAR9/GBF3/5PTASE2/NA/NA/GIS3/ATSEN1/ARR3/VQ18/ATGLR2.2/RHA2A/RHA2B/AT-RSH2/SAUR37/ATSTP13/KFB20/AtNPF5.2/ATBT4/PP2CG1/NA/AtKFB50/P5CS2/ATCTL1/AtDWF4
## GO:0007165                                                                                                                                                                                                                                                                                                                                                                                AtNIGT1/ATMPK18/ATRLP57/B73/KFB01/UGT76B1/ABF3/ARR9/ATWL2/ARR4/ATGA3OX1/COP3/CAR9/NA/DHAR2/HYH/GIS3/ARR3/ATPLC4/ATGLR2.2/RHA2A/RHA2B/RFI2/AtRLP24/ATMKP2/BIP/APRR9/ATMPK19/BIP1/NA/KFB20/AtCIPK16/NA/AtKFB50/AtDWF4
## GO:0009725                                                                                                                                                                                                                                                                                                                                                                                     AtNIGT1/STY46/B73/AtbZIP63/KFB01/ABF3/ARR9/ASG1/ARR4/ATGA3OX1/COP3/CAR9/GBF3/5PTASE2/NA/NA/GIS3/ATSEN1/ARR3/VQ18/RHA2A/RHA2B/AT-RSH2/SAUR37/ATSTP13/KFB20/AtNPF5.2/ATBT4/PP2CG1/NA/AtKFB50/P5CS2/ATCTL1/AtDWF4
## GO:0071310                                                                                                                                                                                                                                                                                                                                                                                                                                AtNIGT1/B73/AtbZIP63/KFB01/UGT76B1/ABF3/ARR9/ARR4/ATGA3OX1/COP3/CAR9/NA/NA/DHAR2/GIS3/ARR3/VQ18/ATGLR2.2/RHA2A/RHA2B/BIP/ATRMA1/BIP1/KFB20/NA/AtKFB50/ATRMA2/AtDWF4
## GO:0033993                                                                                                                                                                                                                                                                                                                                                                                                                                               AtNIGT1/STY46/AtbZIP63/ABF3/ASG1/ATGA3OX1/CAR9/GBF3/5PTASE2/NA/NA/GIS3/ATSEN1/VQ18/RHA2A/RHA2B/AT-RSH2/ATSTP13/AtNPF5.2/ATBT4/PP2CG1/NA/P5CS2/AtDWF4
## GO:0071495                                                                                                                                                                                                                                                                                                                                                                                                                                                                        AtNIGT1/B73/AtbZIP63/KFB01/ABF3/ARR9/ARR4/ATGA3OX1/COP3/CAR9/NA/GIS3/ARR3/VQ18/ATGLR2.2/RHA2A/RHA2B/KFB20/NA/AtKFB50/AtDWF4
## GO:0032870                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 AtNIGT1/B73/AtbZIP63/KFB01/ABF3/ARR9/ARR4/ATGA3OX1/COP3/CAR9/NA/GIS3/ARR3/VQ18/RHA2A/RHA2B/KFB20/NA/AtKFB50/AtDWF4
## GO:1901698                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     AtNIGT1/ATNRT3.1/ACH1/NA/NA/ATGLR2.2/BIP/ATRMA1/NA/BIP1/AtNPF5.2/ATRMA2/ATCTL1
##            Count
## GO:0050896    98
## GO:0042221    68
## GO:0051716    55
## GO:0010033    48
## GO:0070887    40
## GO:0009719    35
## GO:0007165    35
## GO:0009725    34
## GO:0071310    28
## GO:0033993    24
## GO:0071495    21
## GO:0032870    20
## GO:1901698    13
## [1] "LASSO 990 genes, 0-0.2 \n"
## [1] ID          Description GeneRatio   BgRatio     pvalue      p.adjust   
## [7] qvalue      geneID      Count      
## <0 rows> (or 0-length row.names)
## [1] "RF 859 genes, min mse at 0-0.2"
## [1] ID          Description GeneRatio   BgRatio     pvalue      p.adjust   
## [7] qvalue      geneID      Count      
## <0 rows> (or 0-length row.names)
## [1] "LASSO 279 genes, 0.3-0.6 \n"
## [1] ID          Description GeneRatio   BgRatio     pvalue      p.adjust   
## [7] qvalue      geneID      Count      
## <0 rows> (or 0-length row.names)
## [1] "RF 295 genes, min mse at 0.3-0.6"
##                    ID                        Description GeneRatio  BgRatio
## GO:0048731 GO:0048731                 system development    50/256 154/1192
## GO:0000003 GO:0000003                       reproduction    40/256 114/1192
## GO:0022414 GO:0022414               reproductive process    40/256 114/1192
## GO:0006955 GO:0006955                    immune response    22/256  50/1192
## GO:0002376 GO:0002376              immune system process    22/256  51/1192
## GO:0098542 GO:0098542 defense response to other organism    20/256  46/1192
##                  pvalue   p.adjust     qvalue
## GO:0048731 0.0004408213 0.04254626 0.04093893
## GO:0000003 0.0002974878 0.03801988 0.03658354
## GO:0022414 0.0002974878 0.03801988 0.03658354
## GO:0006955 0.0002189647 0.03801988 0.03658354
## GO:0002376 0.0003110010 0.03801988 0.03658354
## GO:0098542 0.0005220400 0.04254626 0.04093893
##                                                                                                                                                                                                                                                                                                                      geneID
## GO:0048731 ABCG14/GAPCP-2/ASL39/PTL/AAE3/GAT/DVL4/UMAMIT11/MDH/CYP78A9/MEE49/MAKR5/AHK1/ROT3/MEE31/DPG1/RGA/ACH2/AFB3/AtLARP1b/AtMAX2/BLH2/At2-MMP/RBL/RBL/ACBP/ATXTH28/PIPL3/BZR1/emb1379/ATXIB/ATMYB68/ATWRKY2/AGC1-1/AHL29/ATERS/ExAD/OVA1/UMAMIT14/BPE/IAA30/GRP23/WOX5/ATPRMT10/AtCLE6/ANAC029/MUCI70/HAE/ADO3/ATMYB61
## GO:0000003                                                                   ABCG14/PTL/AAE3/UMAMIT11/MDH/CYP78A9/MEE49/AHK1/ROT3/MEE31/DPG1/RGA/AFB3/At2-MMP/ARK3/APK3/RBL/RBL/ACBP/ATXTH28/BZR1/emb1379/ATCBL9/BUBR1/ATWRKY2/AHL29/ATERS/OVA1/ATB'/UMAMIT14/BPE/IAA30/GRP23/ATPRMT10/ANAC029/EDA5/MUCI70/HAE/ADO3/ATMYB61
## GO:0022414                                                                   ABCG14/PTL/AAE3/UMAMIT11/MDH/CYP78A9/MEE49/AHK1/ROT3/MEE31/DPG1/RGA/AFB3/At2-MMP/ARK3/APK3/RBL/RBL/ACBP/ATXTH28/BZR1/emb1379/ATCBL9/BUBR1/ATWRKY2/AHL29/ATERS/OVA1/ATB'/UMAMIT14/BPE/IAA30/GRP23/ATPRMT10/ANAC029/EDA5/MUCI70/HAE/ADO3/ATMYB61
## GO:0006955                                                                                                                                                                         ABCG14/AAE3/OBF4/MDH/ATTLP1/EIF(ISO)4E/POQR/AT-POX/PUB25/CSA1/PBL36/At2-MMP/MOS4/MPL1/NA/HR3/NA/SNIPER4/LecRK-I.10/ATEXO70E2/HAE/AtMYB28
## GO:0002376                                                                                                                                                                         ABCG14/AAE3/OBF4/MDH/ATTLP1/EIF(ISO)4E/POQR/AT-POX/PUB25/CSA1/PBL36/At2-MMP/MOS4/MPL1/NA/HR3/NA/SNIPER4/LecRK-I.10/ATEXO70E2/HAE/AtMYB28
## GO:0098542                                                                                                                                                                                         ABCG14/AAE3/OBF4/MDH/ATTLP1/EIF(ISO)4E/POQR/AT-POX/PUB25/CSA1/At2-MMP/MOS4/MPL1/NA/HR3/NA/SNIPER4/LecRK-I.10/HAE/AtMYB28
##            Count
## GO:0048731    50
## GO:0000003    40
## GO:0022414    40
## GO:0006955    22
## GO:0002376    22
## GO:0098542    20
go_all <- enrich_go(convert_from_agi(genes), convert_from_agi(rownames(gene_annotations$`Arabidopsis thaliana`)))
draw_enrich_go(go_all)

Rien de spécial n’est trouvé en lien avec les ontologies enrichies dans certains groupes, la liste de base est enrichie en ribosomal functions, et ça se retrouve dans des sous groupes comparés à l’ensemble du génome. comparés à la liste de base, les sous groups ne sont quasiment pas enrichis…

Profils d’expression

expression <- data.frame(counts)
expression <- (expression-rowMeans(expression)) / matrixStats::rowSds(as.matrix(expression))


Heatmap(expression, cluster_columns = F, show_row_names = F)+ rowAnnotation(
    clusters_lasso = clusters_lasso, clusters_rf = clusters_rf,
    col=list(clusters_lasso= setNames(c("darkorange", "yellow", "green"), 
                                       nm = names(table(clusters_lasso))),
             clusters_rf= setNames(c("darkorange", "yellow", "green"), 
                                       nm = names(table(clusters_rf)))))

expression$genes <- rownames(expression)

reshape2::melt(expression) %>%
  mutate(time = extract_numeric(str_split_fixed(variable, '_', 2)[,1]),
         rep = str_split_fixed(variable, '_', 2)[,2],
         trt = substr(variable,1,1),
         group_lasso = clusters_lasso[genes],
         group_rf = clusters_rf[genes]) %>%
  filter(trt!="T")%>%
  ggplot(aes(x=time, y=value, col = group_lasso, group =genes))+
  geom_line(alpha = 0.1) + 
  facet_wrap(group_lasso~ trt, nrow = 1) 
## Using genes as id variables
## extract_numeric() is deprecated: please use readr::parse_number() instead

reshape2::melt(expression) %>%
  mutate(time = extract_numeric(str_split_fixed(variable, '_', 2)[,1]),
         rep = str_split_fixed(variable, '_', 2)[,2],
         trt = substr(variable,1,1),
         group_lasso = clusters_lasso[genes],
         group_rf = clusters_rf[genes]) %>%
  filter(trt!="T")%>%
  ggplot(aes(x=time, y=value, col = group_rf, group =genes))+
  geom_line(alpha = 0.1) + 
  facet_wrap(group_rf~ trt, nrow = 1) 
## Using genes as id variables
## extract_numeric() is deprecated: please use readr::parse_number() instead